**************************************************************
* ELECTRICITY SPILOVERS PROJECT
*  DATA PREPARATION & CLEANING
**************************************************************

****************
*Billing data controls
****************
frame create billing
frame change billing
use $data_main\elecbilling_20170919.dta, clear //Raw monthly billing data

gen mar14_use=elec_use if ym_start==650
	gsort HHID -mar14_use
	by HHID: carryforward mar14_use, replace
gen apr14_use=elec_use if ym_start==651
	gsort HHID -apr14_use
	by HHID: carryforward apr14_use, replace
gen may14_use=elec_use if ym_start==652
	gsort HHID -may14_use
	by HHID: carryforward may14_use, replace
gen jun14_use=elec_use if ym_start==653
	gsort HHID -jun14_use
	by HHID: carryforward jun14_use, replace
gen jul14_use=elec_use if ym_start==654
	gsort HHID -jul14_use
	by HHID: carryforward jul14_use, replace
gen aug14_use=elec_use if ym_start==655
	gsort HHID -aug14_use
	by HHID: carryforward aug14_use, replace
gen sep14_use=elec_use if ym_start==656
	gsort HHID -sep14_use
	by HHID: carryforward sep14_use, replace
gen oct14_use=elec_use if ym_start==657
	gsort HHID -oct14_use
	by HHID: carryforward oct14_use, replace
gen nov14_use=elec_use if ym_start==658
	gsort HHID -nov14_use
	by HHID: carryforward nov14_use, replace
gen dec14_use=elec_use if ym_start==659
	gsort HHID -dec14_use
	by HHID: carryforward dec14_use, replace
gen jan15_use=elec_use if ym_start==660
	gsort HHID -jan15_use
	by HHID: carryforward jan15_use, replace
gen feb15_use=elec_use if ym_start==661
	gsort HHID -feb15_use
	by HHID: carryforward feb15_use, replace

keep HHID elec_use_summer elec_use_annual elec_use_winter mar14_use ///
	 apr14_use may14_use jun14_use jul14_use aug14_use sep14_use ///
	 oct14_use nov14_use dec14_use jan15_use feb15_use flag_outlier
sort HHID
duplicates drop
save $data_main\baseline_euse_$outputdate, replace
frame change data_main

****************	
*DROPPING UNNECESSARY DATA

*Dropping HWS and HWSPlus households
drop if hws==1|hws_alt==1|hwsp==1
drop hws hws_alt hwsp
 
*Keeping only data in relevant study sample
drop if date<19844
drop if date>20605

*Dropping negative readings - Errors in AMI data
 foreach y in wuse euse {
		drop if `y'<0
}
*	
*Calendar indicators and variables
drop year month day
gen month=month(date)
gen year=year(date)
gen ym=ym(year,month)

****************	
*MERGING RELEVANT DATA	

*Merging in weather data
merge m:1 date hour using $data_main\weather_20160817.dta
	drop if _merge==2  
	drop _merge humidity 

*Merging in electricity billing data
merge m:1 HHID using $data_main\baseline_euse_$outputdate.dta
	drop if _merge==2
	drop _merge	
	
*Merging assessor data	
merge m:1 HHID using $data_main\PoolAssessorData_20170622_destring.dta
	drop if _merge==2
	drop _merge
drop landvalue landbaseyear improvementvalue impbaseyear totallandimpvalue 
drop if pool==1

*Merging historical water data	
merge m:1 HHID using $data_main\BaselineWater_20171018.dta
	drop if _merge==2
	drop _merge
drop WS HWS HWSPlus

****************
*GENERATING VARIABLES AND INDICATORS	

*Pre & Post Treatment Indicator
* Notes: -Post-treatment begins at May 15, 2015 (20223)
gen post=0 
	replace post=1 if date>=20223
	label var post "Post Treatment"

*Updating WS treatment 
* Notes: - Treatment defined as assigned to WS and post treatment
gen treat=ws
replace ws=ws*post	
	label var ws "WaterSmart"
 
*Flagging AMI outliers
gen flag_euse=0
	replace flag_euse=1 if euse>20 & !missing(euse)

*Temperature Indicators
forvalues f = 60(5)85 {
	gen temp_`f' = 0
		replace temp_`f' = 1 if temp>=`f' & temp<`f'+5
	gen temp_ws_`f' = 0
		replace temp_ws_`f' = 1 if temp>=`f' & temp<`f'+5 & ws==1
}
*
*Labeling variables
label var temp_60 "Control Use (<65F)"
label var temp_ws_60 "WaterSmart (<65F)"
label var temp_65 "Control Use (65F-70F)"
label var temp_ws_65 "WaterSmart (65F-70F)"
label var temp_70 "Control Use (70F-75F)"
label var temp_ws_70 "WaterSmart (70F-75F)"
label var temp_75 "Control Use (75F-80F)"
label var temp_ws_75 "WaterSmart (75F-80F)"
label var temp_80 "Control Use (80F-85F)"
label var temp_ws_80 "WaterSmart (80F-85F)"
label var temp_85 "Control Use (85F-90F)"
label var temp_ws_85 "WaterSmart (85F-90F)"

*Updating for cutoff at below 65 and above 85
replace temp_60=1 if temp<=60
replace temp_ws_60=1 if temp<=60 & ws==1
replace temp_85=1 if temp>=85
replace temp_ws_85=1 if temp>=85 & ws==1

*Precipitation Indicators
gen precip_0=0
	replace precip_0=1 if precip==0
gen precip_1=0
	replace precip_1=1 if precip>0 
 
 
****************
*SORTING 
sort HHID date_h
drop month year temp temp_mean temp_max temp_min precip precip_daily
drop if flag_outlier==1
drop if flag_euse==1
drop if elec_use_summer==.
compress
